package ca.uwindsor.cs.deepweb.utility;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;

public class ChinesePhrasesExtractor {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		StringBuilder phrases = new StringBuilder();
		String path="C:\\Users\\Jie\\Downloads\\《现代汉语词典》 商务印书馆.txt";
		InputStreamReader read = null;
		try {
			read = new InputStreamReader(new FileInputStream(path), "GB2312");
			BufferedReader bufferedreader_in;
			bufferedreader_in = new BufferedReader(read);
			String string_line = new String();
			int start;
			int end;
			while ((string_line = bufferedreader_in.readLine()) != null) {
				string_line = string_line.trim();
				start = string_line.indexOf("【");
				end=string_line.indexOf("】");
				if(string_line.equals("") || string_line.contains("noveldown")||end==-1){
					continue;
				}
				string_line = string_line.substring(start+1,end);
				phrases.append(string_line+"\n");
			}
			bufferedreader_in.close();

			FileOutputStream fp = new FileOutputStream("C:\\Users\\Jie\\Downloads\\现代汉语词典.txt");
			fp.write(phrases.toString().trim().getBytes("UTF-8"));
			fp.flush();
			fp.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}
